import re
# 不以4，7结尾的手机号码（11位）
phone = '1234567890'
result = re.match(r'\d{9}[0-35-689]$', phone)
print(result)
print(result.group())
print('------------------------------')

# 爬虫
phone = '010-12345678'
result = re.match(r'(\d{3}|\d{4})-(\d{8})$', phone) # ()表示分组
print(result)

# 分别提取
print(result.group())
print(result.group(1))
print(result.group(2))
print('------------------------------')
msg = '<html>abc</html>'
msg1 = '<h1>hello</h1>'

result = re.match(r'<[0-9a-zA-Z]+>(.+)</[0-9a-zA-Z]+>', msg)
print(result)
print(result.group(1))
# number
result = re.match(r'<([0-9a-zA-Z]+)>(.+)</\1>$', msg1)
print(result)
print(result.group(1))
print(result.group(2))

msg = '<html><h1>abc</h1></html>'
result = re.match(r'<([0-9a-zA-Z]+)><([0-9a-zA-Z]+)>(.+)</\2></\1>$', msg)
print(result)
print(result.group(1))
print(result.group(2))
print(result.group(3))
